#### The code from this script reproduces all tables and figures from the article "Rethinking Public Funding of Parties and Corruption: Confronting Theoretical Complexity and Challenging Measurement"

### Set your working directory

setwd("your_working_directory")

### Install the required packages if you do not have them installed 

#install.packages(tidyverse)
#install.packages(haven)
#install.packages(fixest)
#install.packages(geepack)
#install.packages(panelr)
#install.packages(broom)
#install.packages(modelsummary)
#install.packages(bestNormalize)
#install.packages(ggeffects)
#install.packages(cowplot)
#install.packages(scales)
#install.packages(corrplot)
#install.packages(ggrepel)
#install.packages(kableExtra)

### Download libraries 
library(tidyverse)
library(haven)
library(fixest)
library(geepack)
library(panelr)
library(broom)
library(modelsummary)
library(bestNormalize)
library(ggeffects)
library(cowplot)
library(scales)
library(corrplot)
library(ggrepel)
library(kableExtra)

### Import datasets 

impact <- read_dta("impact.dta") %>%
  mutate(year_factor = factor(year), 
         country_factor = factor(country)) 
     
### Figure 1: Cross-national and within-country variation in the percentage of firms significantly and decisively affected by informal payments made to political parties and parliamentarians.

impact %>%
  group_by(country) %>%
  mutate(mean_corruption = sum(capture_impact_1, na.rm = TRUE)) %>%
  ggplot(aes(x = factor(year), y = fct_reorder(country, mean_corruption), fill = capture_impact_1)) +
  geom_raster() +
  scale_fill_gradient("Percentage of firms:", low = "white", high = "lightsteelblue4", breaks = seq(0, 50, 5)) +
  coord_flip() +
  theme_light(base_family = "serif") +
  theme(panel.grid = element_blank(),
        plot.caption.position = "plot",
        plot.caption = element_text(size = 10, hjust = 0), 
        axis.text.x = element_text(angle = 90),
        legend.position = "top",
        legend.key.width = unit(2, units = "lines"),
        legend.margin = margin(0, 0, -0.5, 0, unit = "lines")) + 
  labs(x = "", y = "")

ggsave("figure_1.png", height = 2.6, width = 7.5, dpi = 300)
dev.off()
####################################################
### Figure 2: Original and transformed distribution of dependent variable: Percentage of firms affected by informal payments to political parties and parliamentarians

p1d <- ggplot(impact, aes(x = capture_impact_1)) +
  geom_density(color = "lightblue4") +
  theme_minimal(base_family = "serif") +
  theme(panel.border = element_rect(color = "gray40", fill = NA),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_line(colour = "grey90", linewidth = 0.1),
        plot.caption.position = "plot",
        plot.caption = element_text(size = 10, hjust = 0), 
        plot.title = element_text(size = 11, color = "grey30")) +
  labs(title = "A: Original distribution", y = "Density", x = "Percentage of firms")

p2d <- ggplot(impact, aes(x = capture_impact_1_norm)) +
  geom_density(color = "lightblue4") +
  theme_minimal(base_family = "serif") +
  theme(panel.border = element_rect(color = "gray40", fill = NA),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_line(colour = "grey90", linewidth = 0.1),
        plot.title = element_text(size = 11, color = "grey30")) +
  labs(title = "B: Yeo-Johnson transformed distribution", y = "", x = "Transformed values")

plot_grid(p1d, p2d)

ggsave("figure_2.png", width = 6.5, height = 3.2, dpi = 300)
dev.off()
##########################################################
### Figure 3: Cross-national and within-country variation in direct public funding over time: aggregated by BEEPS round

impact %>%
  group_by(country) %>%
  mutate(dpf_country = sum(dpfv_nomin_infl, na.rm = TRUE)) %>%
  ggplot(aes(x = factor(year), y = fct_reorder(country, dpf_country), fill = dpfv_nomin_infl)) +
  geom_raster() +
  scale_fill_gradient("Public funding per vote (USD inflation-adjusted)", low = "white", high = "lightsteelblue4", breaks = seq(0, 12, 1)) +
  coord_flip() +
  theme_light(base_family = "serif") +
  theme(panel.grid = element_blank(),
        plot.caption.position = "plot",
        plot.caption = element_text(size = 10, hjust = 0), 
        axis.text.x = element_text(angle = 90),
        legend.position = "top",
        legend.key.width = unit(2, units = "lines"),
        legend.margin = margin(0, 0, -0.5, 0, unit = "lines")) + 
  labs(x = "", y = "")
ggsave("figure_3.png", height = 2.6, width = 7.5, dpi = 300)
dev.off()

######################################################
### Figure 4: Relationship between the level of public funding and donation limits

ggplot(impact, aes(x = dpfv_nomin_infl, y = private_agreg)) +
  geom_point(color = "lightsteelblue4", size = 2, shape = 1) +
  scale_x_continuous(breaks = seq(0, 14,2)) +
  theme_minimal(base_family = "serif") +
  theme(panel.border = element_rect(color = "gray30", fill = NA),
        plot.caption.position = "plot",
        plot.caption = element_text(size = 10, hjust = 0), 
        panel.grid.minor = element_blank(),
        panel.grid.major = element_line(colour = "grey90", size = 0.1)) +
  labs(x = "Inflation-adjusted public funding per vote (USD)", y = "Donation limits restrictions")

ggsave("figure_4.png", width = 5, height = 3, dpi = 300)
dev.off()

##########################################################
### Figure 5: Relationship between state funding of parties and party corruption by country

impact %>%
  filter(country != "KGZ") %>%
  mutate(dpfv_nomin_infl = ifelse(dpfv_nomin_infl > 0.5, round(dpfv_nomin_infl, 2), round(dpfv_nomin_infl, 3))) %>%
  ggplot(aes(x = dpfv_nomin_infl, y = capture_impact_1)) +
  geom_point(shape = 1, size = 2, color = "lightsteelblue4") +
  geom_smooth(method = "lm", color = "lightsteelblue4", se = F) +
  scale_y_continuous(label = ~ scales::comma(.x, accuracy = 1)) + 
  facet_wrap(~country, scales = "free", ncol = 5) +
  theme_minimal(base_family = "serif") +
  theme(panel.border = element_rect(color = "gray30", fill = NA),
        plot.caption.position = "plot",
        plot.caption = element_text(size = 10, hjust = 0), 
        panel.grid.minor = element_blank(),
        panel.grid.major = element_line(colour = "grey90", linewidth = 0.1),
        strip.background = element_blank(),
        panel.spacing.x = unit(0.1, "lines"),
        panel.spacing.y = unit(-0.25, "lines"),
        axis.title = element_text(size = 10),
        axis.text.x = element_text(size = 8, margin = margin(t = -0.5)), 
        axis.text.y = element_text(size = 8, margin = margin(r = -0.5)), 
        strip.text = element_text(size = 9, color = "grey30")) +
  labs(x = "Inflation-adjusted public funding per vote (USD)", y = "Percentage of firms  significantly affected by informal \npayments to political parties and parliamentarians")

ggsave("figure_5.png", width = 7, height = 7, dpi = 300)
dev.off()

####################################################
### Figure 6: Relationship between state funding of parties and party corruption by BEEPS wave

impact %>%
  mutate(beeps_round = paste0("BEEPS Wave", sep = "-", as.character(year))) %>%
  ggplot(aes(x = dpfv_nomin_infl, y = capture_impact_1)) +
  geom_point(shape = 1, size = 1.5, color = "lightsteelblue4") +
  geom_smooth(method = "lm", color = "lightsteelblue4", se = F) +
  geom_text_repel(aes(label = country), size = 2.5, family = "serif", max.overlaps = 30, segment.color = "grey40", segment.size = 0.2) +
  facet_wrap(~ beeps_round, scales = "free", ncol = 3) +
  theme_minimal(base_family = "serif") +
  theme(panel.border = element_rect(color = "gray30", fill = NA),
        plot.caption.position = "plot",
        plot.caption = element_text(size = 10, hjust = 0), 
        panel.grid.minor = element_blank(),
        panel.grid.major = element_line(colour = "grey90", linewidth = 0.1),
        strip.background = element_blank(),
        panel.spacing.x = unit(0.1, "lines"),
        panel.spacing.y = unit(-0.25, "lines"),
        axis.title = element_text(size = 10),
        axis.text.x = element_text(size = 9, margin = margin(t = -0.5)), 
        axis.text.y = element_text(size = 9, margin = margin(r = -0.5)), 
        strip.text = element_text(size = 10, color = "grey30")) +
  labs(x = "Inflation-adjusted public funding per vote (USD)", y = "Percentage of firms  significantly affected by informal \npayments to political parties and parliamentarians")

ggsave("figure_6.png", width = 7, height = 5, dpi = 300)
dev.off()

#####################################################
#####################################################
### Fit TWFE models for Table 1: Public funding and party corruption: two-way fixed-effects models with alternative versions of public funding

### Declare panel data 
impact_panel <- panel(impact, ~country + year)
  
### Fit two-way fixed-effects models: Dependent Variable - Impact on business performance due to informal payments to political parties and parliamentarians. 

models_twfe_main <- list(
    m1_twfe <- feols(capture_impact_1_norm ~ dpfv_nomin_infl + private_agreg + democracy_scaled + log(gdppc_ppp_2017) | country + year, data = impact_panel, cluster = ~ country), # Model 1
    m2_twfe <- update(m1_twfe, . ~ . + elect_syst + govern_type2 + gov_size_imf), # Model 2
    m3_twfe <- feols(capture_impact_1_norm ~ dpfv_dprc_infl + private_agreg + democracy_scaled + log(gdppc_ppp_2017) | country + year, data = impact_panel, cluster = ~ country),  # Model 3
    m4_twfe <- update(m3_twfe, . ~ . + elect_syst + govern_type2 + gov_size_imf)) # Model 4

# Clean labels for the TWFE regression table
model_labels <- 
  c("dpfv_nomin_infl" = "DPF level-inflation adjusted",
    "dpfv_dprc_infl" = "DPF level-inflation adjusted & discounted",
    "private_agreg" = "Donation limits",
    "democracy_scaled" =  "Democracy",
    "log(gdppc_ppp_2017)" = "GDPpc(log)",
    "elect_syst" = "Electoral System", 
    "govern_type2" = "Parliamentary",
    "ethn_fr" = "Ethnic fraction.",
    "gini_disp" = "Inequality",
    "gov_size_imf" = "Government size % GDP")

## Table 1: Public funding and party corruption: two-way fixed-effects models with alternative versions of public funding

modelsummary(models_twfe_main,
             stars = TRUE,
             coef_map = model_labels,
             gof_omit = "AIC|BIC|Std.Error|R2 Within|RMSE",
             #output = "default") # output for table results in viewer panel
             output = "table_1_twfe_main.docx") # output for table results in Microsoft word
  
##################################################
##################################################
### Fit WBRE Models for Table 2: Public funding and party corruption: within-between random-effects models

## Within-Between Random Effects Models (WBRE) with different operationalization of direct public funding 

#### Declaring panel data for WBRE
panel_wbre <- panel_data(impact, id = country, wave = year) 

# Fit WBRE models: DV - 
wbre_models_main <- list(
  m1_wbgee <- wbgee(capture_impact_1_norm ~ dpfv_nomin_infl + private_agreg + democracy_scaled + log(gdppc_ppp_2017), data = panel_wbre, model = "w-b", use.wave = F, scale = FALSE, balance.correction = TRUE, cor.str = "ar1"), # model 1
  m2_wbgee <- update(m1_wbgee, .~. + elect_syst + govern_type2 + gov_size_imf), # model 2
  m3_wbgee <- wbgee(capture_impact_1_norm ~ dpfv_dprc_infl + private_agreg + democracy_scaled + log(gdppc_ppp_2017), data = panel_wbre, model = "w-b", use.wave = F, scale = FALSE, balance.correction = TRUE, cor.str = "ar1"), # model 3
  m4_wbgee <- update(m3_wbgee, .~. + elect_syst + govern_type2 + gov_size_imf)) # model 4

# Clean term labels

coeff_labels_wbgee <- 
  c("dpfv_nomin_infl" = "DPF level-inflation adjusted (within)",
    "dpfv_dprc_infl" = "DPF level-inflation adjusted & discounted (within)", 
    "private_agreg" = "Donation limits (within)", 
    "democracy_scaled" =  "Democracy (within)", 
    "log(gdppc_ppp_2017)" = "GDPpc(log) (within)", 
    "elect_syst" = "Electoral System (within)", 
    "govern_type2" = "Parliamentary (within)", 
    "ethn_fr" = "Ethnic fraction. (within)", 
    "gini_disp" = "Inequality (within)", 
    "gov_size_imf" = "Government size % GDP (within)", 
    "imean(dpfv_nomin_infl)" = "DPF level-inflation adjusted (between)",
    "imean(dpfv_dprc_infl)" = "DPF level-inflation adjusted & discounted (between)", 
    "imean(private_agreg)" = "Donation limits (between)",
    "imean(democracy_scaled)" =  "Democracy (between)", 
    "imean(log(gdppc_ppp_2017))" = "GDPpc(log) (between)", 
    "imean(elect_syst)" = "Electoral System (between)", 
    "imean(govern_type2)" = "Parliamentary (between)", 
    "imean(ethn_fr)" = "Ethnic fraction. (between)",
    "imean(gini_disp)" = "Inequality (between)", 
    "imean(gov_size_imf)" = "Government size % GDP (between)")

### Table 2: Public funding and party corruption: within-between random-effects models

modelsummary(wbre_models_main, 
             stars = TRUE, 
             coef_map = coeff_labels_wbgee,
             coef_omit = "year|country|(Intercept)",
             #output = "default") # output for table results in viewer panel
             output = "table_2_wbre_main.docx") # output for table results in Microsoft word

########################################################
########################################################
### Fit TWFE models for Table 3: Public funding and corruption: two-way fixed-effects models, with alternative operationalization of public funding and lags

## Use another dataset for models 3 and 4 which is aggregated in a way to obtain evenly-spaced 3-year lags
impact_lags_aggr <- read_dta("impact_lags_aggr.dta") 

# Declare panel data
impact_panel_lags_aggr <- panel(impact_lags_aggr, ~country + year)

models_twfe_main_lag <- list(
   m1_twfe_lag <- feols(capture_impact_1_norm ~ dpfv_nomin_infl_lag + private_agreg_lag + democracy_scaled_lag + log(gdppc_ppp_2017_lag) + elect_syst_lag + govern_type2_lag + gov_size_imf_lag | country + year, data = impact_panel, cluster = ~country), # Model 1, lagged by BEEPS round
  
   m2_twfe_lag <- feols(capture_impact_1_norm ~ dpfv_dprc_infl_lag + private_agreg_lag + democracy_scaled_lag + log(gdppc_ppp_2017_lag) + elect_syst_lag + govern_type2_lag + gov_size_imf_lag | country + year, data = impact_panel, cluster = ~country), # Model 2, lagged by BEEPS round
  
   m3_twfe_lag <- feols(capture_impact_1_norm_lag ~ dpfv_nomin_infl + private_agreg + democracy_scaled + log(gdppc_ppp_2017) + elect_syst + govern_type2 + gov_size_imf | country + year, data = impact_panel_lags_aggr, cluster = ~country), # Model 3, lagged by 3-year evenly-spaced lags
  
   m4_twfe_lag <- feols(capture_impact_1_norm_lag ~ dpfv_dprc_infl + private_agreg + democracy_scaled + log(gdppc_ppp_2017) + elect_syst + govern_type2 + gov_size_imf | country + year, data = impact_panel_lags_aggr, cluster = ~country)) # Model 4, lagged by 3-year evenly-spaced lags

# Clean coefficient labels for the regression table in modles using lags

model_labels_main_lag <- 
  c("dpfv_total_lag" = "DPF level-nominal value",
    "dpfv_total" = "DPF level-nominal value",
    "dpfv_nomin_infl_lag" = "DPF level-inflation adjusted",
    "dpfv_nomin_infl" = "DPF level-inflation adjusted",
    "dpfv_dprc_infl_lag" = "DPF level-infaltion adjusted & discounted",
    "dpfv_dprc_infl" = "DPF level-infaltion adjusted & discounted",
    "private_agreg_lag" = "Donation limits",
    "private_agreg" = "Donation limits",
    "democracy_scaled_lag" =  "Democracy", 
    "democracy_scaled" =  "Democracy",
    "log(gdppc_ppp_2017_lag)" = "GDPpc(log)", 
    "log(gdppc_ppp_2017)" = "GDPpc(log)", 
    "elect_syst_lag" = "Electoral System", 
    "elect_syst" = "Electoral System", 
    "govern_type2_lag" = "Parliamentary", 
    "govern_type2" = "Parliamentary", 
    "ethn_fr_lag" = "Ethnic fraction.",
    "ethn_fr" = "Ethnic fraction.", 
    "gini_disp_lag" = "Inequality", 
    "gini_disp" = "Inequality", 
    "gov_size_imf_lag" = "Government size % GDP",
    "gov_size_imf" = "Government size % GDP")

### Table 3: Public funding and corruption: two-way fixed-effects models, with alternative operationalization of public funding and lags 
modelsummary(models_twfe_main_lag,
             stars = TRUE,
             coef_map = model_labels_main_lag,
             gof_omit = "AIC|BIC|Std.Error|R2 Within|RMSE",
             #output = "default") # output for table results in viewer panel
             output = "table_3_twfe_main_lag.docx") # output for table results in Microsoft word

########################################################
########################################################
### Fit WBRE models for Table 4: Public funding and corruption: within-between random-effects models, with alternative operationalization of public funding and lags

## Declare panel data structure
### Main dataset
panel_wbre_lag <- panel_data(impact, id = country, wave = year) 
### Dataset with 3-year uniform lags
panel_wbre_lag_unif <- panel_data(impact_lags_aggr, id = country, wave = year) 

## Fit Within-Between Random Effects Models
wbre_models_main_lag <- list(
  m1_wbgee_lag <- wbgee(capture_impact_1_norm ~ dpfv_nomin_infl_lag + private_agreg_lag + democracy_scaled_lag + log(gdppc_ppp_2017_lag) + elect_syst_lag + govern_type2_lag + gov_size_imf_lag, data = panel_wbre_lag, model = "w-b", use.wave = F, scale = FALSE, balance.correction = TRUE, cor.str = "ar1"), # model 1
  m2_wbgee_lag <- wbgee(capture_impact_1_norm ~ dpfv_dprc_infl_lag + private_agreg_lag + democracy_scaled_lag + log(gdppc_ppp_2017_lag) + elect_syst_lag + govern_type2_lag + gov_size_imf_lag, data = panel_wbre_lag, model = "w-b", use.wave = F, scale = FALSE, balance.correction = TRUE, cor.str = "ar1"), # model 2
  m3_wbgee_lag <- wbgee(capture_impact_1_norm_lag ~ dpfv_nomin_infl + private_agreg + democracy_scaled + log(gdppc_ppp_2017) + elect_syst + govern_type2 + gov_size_imf, data = panel_wbre_lag_unif, model = "w-b", use.wave = F, scale = FALSE, balance.correction = TRUE, cor.str = "ar1"), # model 3
  m4_wbgee_lag <- wbgee(capture_impact_1_norm_lag ~ dpfv_dprc_infl + private_agreg + democracy_scaled + log(gdppc_ppp_2017) + elect_syst + govern_type2 + gov_size_imf, data = panel_wbre_lag_unif, model = "w-b", use.wave = F, scale = FALSE, balance.correction = TRUE, cor.str = "ar1")) # model 4

### Clean term labels
coeff_labels_wbgee_lag <- 
  c("dpfv_nomin_infl_lag" = "DPF level-inflation adjusted (within)",
    "dpfv_dprc_infl_lag" = "DPF level-inflation adjusted & discounted (within)", 
    "private_agreg_lag" = "Donation limits (within)",
    "democracy_scaled_lag" =  "Democracy (within)",
    "log(gdppc_ppp_2017_lag)" = "GDPpc(log) (within)",
    "elect_syst_lag" = "Electoral System (within)",
    "govern_type2_lag" = "Parliamentary (within)",
    "ethn_fr_lag" = "Ethnic fraction. (within)",
    "gini_disp_lag" = "Inequality (within)",
    "gov_size_imf_lag" = "Government size % GDP (within)",
    "imean(dpfv_nomin_infl_lag)" = "DPF level-inflation adjusted (between)",
    "imean(dpfv_dprc_infl_lag)" = "DPF level-inflation adjusted & discounted (between)",
    "imean(private_agreg_lag)" = "Donation limits (between)",
    "imean(democracy_scaled_lag)" =  "Democracy (between)",
    "imean(log(gdppc_ppp_2017_lag))" = "GDPpc(log) (between)",
    "imean(elect_syst_lag)" = "Electoral System (between)",
    "imean(govern_type2_lag)" = "Parliamentary (between)",
    "imean(ethn_fr_lag)" = "Ethnic fraction. (between)",
    "imean(gini_disp_lag)" = "Inequality (between)",
    "imean(gov_size_imf_lag)" = "Government size % GDP (between)",
    "dpfv_total" = "DPF level-nominal value (within)", 
    "dpfv_nomin_infl" = "DPF level-inflation adjusted (within)",
    "dpfv_dprc_infl" = "DPF level-inflation adjusted & discounted (within)", 
    "private_agreg" = "Donation limits (within)", 
    "democracy_scaled" =  "Democracy (within)", 
    "log(gdppc_ppp_2017)" = "GDPpc(log) (within)", 
    "elect_syst" = "Electoral System (within)", 
    "govern_type2" = "Parliamentary (within)", 
    "ethn_fr" = "Ethnic fraction. (within)", 
    "gini_disp" = "Inequality (within)", 
    "gov_size_imf" = "Government size % GDP (within)", 
    "imean(dpfv_total)" = "DPF level-nominal value (between)", 
    "imean(dpfv_nomin_infl)" = "DPF level-inflation adjusted (between)",
    "imean(dpfv_dprc_infl)" = "DPF level-inflation adjusted & discounted (between)", 
    "imean(private_agreg)" = "Donation limits (between)",
    "imean(democracy_scaled)" =  "Democracy (between)", 
    "imean(log(gdppc_ppp_2017))" = "GDPpc(log) (between)", 
    "imean(elect_syst)" = "Electoral System (between)", 
    "imean(govern_type2)" = "Parliamentary (between)", 
    "imean(ethn_fr)" = "Ethnic fraction. (between)",
    "imean(gini_disp)" = "Inequality (between)", 
    "imean(gov_size_imf)" = "Government size % GDP (between)")

### Table 4: Public funding and corruption: within-between random-effects models, with alternative operationalization of public funding and lags

modelsummary(wbre_models_main_lag, 
             stars = TRUE,
             coef_map = coeff_labels_wbgee_lag,
             coef_omit = "year|country|(Intercept)",
             #output = "default") # output for table results in viewer panel
             output = "table_4_wbre_main_lag.docx") ## output for table results in Microsoft word

########################################################
########################################################
### Fit two-way FE models with 1 to 5 lags for Table 5: Public funding and corruption: two-way fixed-effects models, with one to five years lags
# For this estimation, we employ a disagreggated dataset with a one- to five-year lag structure

impact_lags_various <- read_dta("impact_lags.dta") 

# Data management to construct 1 to 5 year lags
impact_lags_various <- impact_lags_various %>%
  group_by(country) %>%
  mutate(across(.cols = c(capture_impact_1, capture_impact_1_norm), ~lead(.x), .names = "{.col}_lag1")) %>%
  mutate(across(.cols = c(capture_impact_1, capture_impact_1_norm), ~lead(.x, 2), .names = "{.col}_lag2")) %>%
  mutate(across(.cols = c(capture_impact_1, capture_impact_1_norm), ~lead(.x, 3), .names = "{.col}_lag3")) %>%
  mutate(across(.cols = c(capture_impact_1, capture_impact_1_norm), ~lead(.x, 4), .names = "{.col}_lag4")) %>%
  mutate(across(.cols = c(capture_impact_1, capture_impact_1_norm), ~lead(.x, 5), .names = "{.col}_lag5")) %>%
  mutate(across(.cols = c(dpfv_nomin_infl, dpfv_dprc_infl, private_agreg, democracy_scaled, gdppc_ppp_2017, elect_syst, govern_type2, gov_size_imf, gini_disp, ethn_fr), ~lag(.x), .names = "{.col}_lag1")) %>%
  mutate(across(.cols = c(dpfv_nomin_infl, dpfv_dprc_infl, private_agreg, democracy_scaled, gdppc_ppp_2017, elect_syst, govern_type2, gov_size_imf, gini_disp, ethn_fr), ~lag(.x, 2), .names = "{.col}_lag2")) %>%
  mutate(across(.cols = c(dpfv_nomin_infl, dpfv_dprc_infl, private_agreg, democracy_scaled, gdppc_ppp_2017, elect_syst, govern_type2, gov_size_imf, gini_disp, ethn_fr), ~lag(.x, 3), .names = "{.col}_lag3")) %>%
  mutate(across(.cols = c(dpfv_nomin_infl, dpfv_dprc_infl, private_agreg, democracy_scaled, gdppc_ppp_2017, elect_syst, govern_type2, gov_size_imf, gini_disp, ethn_fr), ~lag(.x, 4), .names = "{.col}_lag4")) %>%
  mutate(across(.cols = c(dpfv_nomin_infl, dpfv_dprc_infl, private_agreg, democracy_scaled, gdppc_ppp_2017, elect_syst, govern_type2, gov_size_imf, gini_disp, ethn_fr), ~lag(.x, 5), .names = "{.col}_lag5")) %>%
  ungroup()


# Declare panel data for fitting models with 1 to 5 lags

impact_panel_lags <- panel(impact_lags_various, ~country + year)

### Fit Two-Way fixed-effects models by 1 to 5 lags 

models_tfe_lags1_5 <- list(
  "Model 1: 1-year lag" = feols(capture_impact_1_norm_lag1 ~ dpfv_dprc_infl + private_agreg + democracy_scaled + log(gdppc_ppp_2017) + elect_syst + govern_type2 + gov_size_imf | country + year, data = impact_panel_lags, cluster = ~ country), # Model 1
  
  "Model 2: 2-years lag" = feols(capture_impact_1_norm_lag2 ~ dpfv_dprc_infl + private_agreg + democracy_scaled + log(gdppc_ppp_2017) + elect_syst + govern_type2 + gov_size_imf | country + year, data = impact_panel_lags, cluster = ~ country), # Model 2, 
  
  "Model 3: 3-years lag" = feols(capture_impact_1_norm_lag3 ~ dpfv_dprc_infl + private_agreg + democracy_scaled + log(gdppc_ppp_2017) + elect_syst + govern_type2 + gov_size_imf | country + year, data = impact_panel_lags, cluster = ~ country), # Model 3
  
  "Model 4: 4-year lag" = feols(capture_impact_1_norm_lag4 ~ dpfv_dprc_infl + private_agreg + democracy_scaled + log(gdppc_ppp_2017) + elect_syst + govern_type2 + gov_size_imf | country + year, data = impact_panel_lags, cluster = ~ country), # Model 4
  
  "Model 5: 5-years lag" = feols(capture_impact_1_norm_lag5 ~ dpfv_dprc_infl + private_agreg + democracy_scaled + log(gdppc_ppp_2017) + elect_syst + govern_type2 + gov_size_imf | country + year, data = impact_panel_lags, cluster = ~ country)) # Model 5

# Clean coefficient labels for the regression table
model_labels_lags1_5 <- 
  c("dpfv_dprc_infl" = "DPF level",
    "private_agreg" = "Donation limits", 
    "democracy_scaled" =  "Democracy", 
    "log(gdppc_ppp_2017)" = "GDPpc(log)",
    "elect_syst" = "Electoral System",
    "govern_type2" = "Parliamentary",
    "ethn_fr" = "Ethnic fraction.",
    "gini_disp" = "Inequality", 
    "gov_size_imf" = "Government size % GDP")

### Table 5: Public funding and corruption: two-way fixed-effects models, with one to five years lags
modelsummary(models_tfe_lags1_5,
             stars = TRUE,
             coef_map = model_labels_lags1_5, 
             gof_omit = "AIC|BIC|Std.Error|R2 Within|RMSE",
             #output = "default") # output for table results in viewer panel
             output = "table_tfe_lags1_5.docx") ## output for table results in Microsoft word

############################################
############################################
### Predictions for Figure 7: Predicted corruption level conditional on the amount of public funding

### Fit TWFE via OLS for plotting predicted corruption values
twfe_mod <- lm(capture_impact_1_norm ~ dpfv_dprc_infl + private_agreg + democracy_scaled + log(gdppc_ppp_2017) + elect_syst + govern_type2 + gov_size_imf + country_factor + year_factor, data = impact_panel)

### Predicted corruption values TWFE - Yeo-Johnson transformed scale
twfe_predicted <- ggeffect(twfe_mod, terms = "dpfv_dprc_infl", ci.lvl = 0.9) %>%
  mutate(effect_type = "A: Two-way fixed-effects")

### Predicted corruption values WBRE within effect(equivalent to country fixed-effects model specification) - Yeo-Johnson transformed scale
wbre_predicted_within <- ggemmeans(wbre_models_main[[4]], "dpfv_dprc_infl", ci.lvl = 0.9) %>%
  mutate(effect_type = "B: WBRE: Within-effect")

### Predicted corruption values WBRE between effect (pools within and cross-sectional variation) - Yeo-Johnson transformed scale
wbre_predicted_between <- ggemmeans(wbre_models_main[[4]], "imean(dpfv_dprc_infl)", ci.lvl = 0.9) %>%
  mutate(effect_type ="C: WBRE: Between-effect")

predicted_transfromed_scale <- bind_rows(twfe_predicted, wbre_predicted_within, wbre_predicted_between)

### Seed for replication
set.seed(546)

### I am using here the bestNormalise function 
corruption_normalized <- bestNormalize::bestNormalize(impact$capture_impact_1) # transformation
# See the output with the selection of the best transformation
corruption_normalized

### Converting to original scale: statistical significance line
predict(corruption_normalized, newdata = 0, inverse = TRUE)
### Converting to original scale: predicted values all models 
predicted_rev_twfe <-predict(corruption_normalized, newdata = predicted_transfromed_scale$predicted, inverse = TRUE)
### Converting to original scale: predicted lower confidence interval all models
predicted_conf_low <-predict(corruption_normalized, newdata = predicted_transfromed_scale$conf.low, inverse = TRUE)
### Converting to original scale: predicted upper confidence interval all models
predicted_conf_high <-predict(corruption_normalized, newdata = predicted_transfromed_scale$conf.high, inverse = TRUE)

### Create a data frame with reversed predictions to original scale
predicted_reversed <- data.frame(reversed_estimate = predicted_rev_twfe,
                          reversed_conf_low = predicted_conf_low,
                          reversed_conf_high = predicted_conf_high)

### Combine predicted values for both transformed and reversed estimates and confidence intervals
predicted_both <- bind_cols(predicted_transfromed_scale, predicted_reversed) %>%
  mutate(effect_type2 = case_when(effect_type == "A: Two-way fixed-effects" ~ "D: Two-way fixed-effects", 
                                  effect_type == "B: WBRE: Within-effect" ~ "E: WBRE: Within-effect",
                                  effect_type == "C: WBRE: Between-effect" ~ "F: WBRE: Between-effect"))
                                  

### Plot predictions of original(reversed) scale
original_scale_plot <- predicted_both %>%
  filter(x>=0) %>%
  ggplot(aes(x = x, y = reversed_estimate)) +
  geom_ribbon(aes(ymin = reversed_conf_low, ymax = reversed_conf_high), fill = "gray85", alpha = 0.5) +
  geom_line(color= "grey40") +
  scale_y_continuous(breaks = seq(0, 8, 1)) +
  geom_hline(yintercept = 4.738288, color = "lightsteelblue4", linetype = 2) + 
  facet_wrap(~ effect_type2, scales = "free_x", ncol = 3) +
  theme_light(base_family = "serif", base_size = 11) +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major = element_line(colour = "grey92"),
        strip.text = element_text(size = 11, color = "gray10"),
        strip.background = element_rect(fill = "white"),
        axis.text = element_text(size = 11, color = "gray10"),
        axis.title = element_text(size = 12, color = "gray10"),
        legend.position = "top",
        legend.margin = margin(0,0,-0.5,0, unit = "lines")) +
  labs(x = "Public funding per vote in inflation-adjusted USD, discounted", y = "Predicted corruption: \noriginal scale")

### Plot predictions of Yeo-johnson transformed scale
transformed_scale_plot <- predicted_transfromed_scale %>%
  filter(x>=0) %>%
  ggplot(aes(x = x, y = predicted)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), fill = "gray85", alpha = 0.5) +
  geom_line(color= "grey40") +
  scale_y_continuous(breaks = seq(-2, 1.5, 0.5)) +
  geom_hline(yintercept = 0, color = "lightsteelblue4", linetype = 2) + 
  facet_wrap(~ effect_type, scales = "free_x", ncol = 3) +
  theme_light(base_family = "serif", base_size = 11) +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major = element_line(colour = "grey92"),
        strip.text = element_text(size = 11, color = "gray10"),
        strip.background = element_rect(fill = "white"),
        axis.text = element_text(size = 11, color = "gray10"),
        axis.title = element_text(size = 12, color = "gray10"),
        legend.position = "top",
        legend.margin = margin(0,0,-0.5,0, unit = "lines")) +
  labs(x = "Public funding per vote in inflation-adjusted USD, discounted", y = "Predicted corruption: \ntransformed scale")

cowplot::plot_grid(transformed_scale_plot, original_scale_plot, nrow = 2)
### Save the resulted plot
ggsave("predicted_corruption.png", width = 8, height = 6.6, dpi = 400)
dev.off()
